## annotation UCSC known gene to gene symbol
## author: Yaping Liu  lyping1986@gmail.com

my $file=$ARGV[0];
my $file_anno="/Volumes/HD_2/Documents/workspace/data/genomic_feature/UCSC.knownGene.anno.hg19.bed";

open(FH,"<$file_anno") or die;
my @in=<FH>;
chomp(@in);
close(FH);

my %anno=();
foreach my $line(@in){
	next if $line=~/^\#/;
	my @splitin=split "\t",$line;
	my $key = $splitin[1]."~".$splitin[2]."~".$splitin[3]; 
	 $anno{$key}=$line;
}

open(FH,"<$file") or die;
my @data=<FH>;
chomp(@data);
close(FH);
my $output_file=$file."anno.bed";
open(OUT,">$output_file");
foreach my $line(@data){
	if($line=~/FDR/){
		print OUT "$line\n";
		next;
	}
	my @splitin=split "\t",$line;
	 foreach my $in_anno(@in){
	 	my @splitin_anno=split "\t",$in_anno;
	 	if($splitin_anno[1] eq $splitin[0] and ( ($splitin[1] >= $splitin_anno[3] && $splitin[1] <= $splitin_anno[4]) or ($splitin[2] >= $splitin_anno[3] && $splitin[2] <= $splitin_anno[4]))){
	 		$splitin[6]=$splitin_anno[0];
	 		$splitin[7]=$splitin_anno[5];
	 		last;
	 	}
	 }
	 print "$splitin[0]\t$splitin[1]\n";
	 my $output=join "\t",@splitin;
	 print OUT "$output\n";
}
close(OUT);